library(sjmisc)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x tibble::add_case() masks sjmisc::add_case()
## x dplyr::filter() masks stats::filter()
## x purrr::is_empty() masks sjmisc::is_empty()
## x dplyr::lag() masks stats::lag()
## x tidyr::replace_na() masks sjmisc::replace_na()
f <- "https://raw.githubusercontent.com/difiore/ada-2022-datasets/main/data-wrangling.csv"
d <- read_csv(f, col_names = TRUE) #creates a tibble
## Rows: 213 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Scientific_Name, Family, Genus, Species, Leaves, Fauna
## dbl (17): Brain_Size_Species_Mean, Body_mass_male_mean, Body_mass_female_mea...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d
## # A tibble: 213 × 23
## Scientific_Name Family Genus Species Brain_Size_Speci… Body_mass_male_…
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Allenopithecus_n… Cercopi… Alleno… nigrov… 58.0 6130
## 2 Allocebus_tricho… Cercopi… Alloce… tricho… NA 92
## 3 Alouatta_belzebul Atelidae Alouat… belzeb… 52.8 7270
## 4 Alouatta_caraya Atelidae Alouat… caraya 52.6 6525
## 5 Alouatta_guariba Atelidae Alouat… guariba 51.7 5800
## 6 Alouatta_palliata Atelidae Alouat… pallia… 49.9 7150
## 7 Alouatta_pigra Atelidae Alouat… pigra 51.1 11400
## 8 Alouatta_senicul… Atelidae Alouat… senicu… 55.2 6690
## 9 Aotus_azarai Cebidae Aotus azarai 20.7 1180
## 10 Aotus_brumbacki Cebidae Aotus brumba… NA NA
## # … with 203 more rows, and 17 more variables: Body_mass_female_mean <dbl>,
## # MeanGroupSize <dbl>, AdultMales <dbl>, AdultFemale <dbl>,
## # GR_MidRangeLat_dd <dbl>, Precip_Mean_mm <dbl>, Temp_Mean_degC <dbl>,
## # HomeRange_km2 <dbl>, DayLength_km <dbl>, Fruit <dbl>, Leaves <chr>,
## # Fauna <chr>, Canine_Dimorphism <dbl>, Feed <dbl>, Move <dbl>, Rest <dbl>,
## # Social <dbl>
creating variable named “BSD” [ratio of male to female body mass]
d$bsd <- (d$Body_mass_male_mean/d$Body_mass_female_mean)
d$bsd
## [1] 1.9276730 1.0952381 1.3170290 1.5389151 1.2747253 1.3364486 1.7729393
## [8] 1.2840691 0.9593496 NA 1.0512498 1.0179487 0.8413462 1.1190400
## [15] 1.0143266 1.0196078 0.9252275 0.9705240 1.0672154 1.0520446 0.8462127
## [22] 1.0910633 1.1908302 1.1191339 1.1979167 1.1660517 1.0902090 1.0582524
## [29] 1.0669456 0.9202899 1.0578512 1.0662393 0.9166667 NA NA
## [36] 0.9473684 0.9875776 NA 1.1029836 0.9016393 1.3886463 1.3590454
## [43] 1.3485738 1.2262940 1.2527346 1.6784452 1.8269962 1.7218182 1.7741935
## [50] 1.6372275 1.5607032 1.6666667 1.4895833 1.3333333 1.2413793 1.6339286
## [57] 1.7304348 1.6401469 1.7215514 1.7796610 1.5657277 1.4735111 1.4689655
## [64] NA 1.7576531 1.3623693 1.2099448 1.0071942 1.0472973 1.4478447
## [71] 1.4182001 1.3307027 1.2787318 1.2887711 1.1927711 1.4016173 1.1662050
## [78] 1.0522088 1.9076923 1.0477861 1.0093897 NA 1.1851852 0.9642995
## [85] 0.9670782 0.8921576 1.0206186 0.9559471 1.0218579 1.0996169 0.9764151
## [92] 1.0163043 1.0654255 1.0424077 1.0875912 1.6666667 2.3832168 NA
## [99] 1.1164179 0.9816147 1.6538462 1.0103093 0.9577703 1.0795964 1.0528000
## [106] 0.9371080 1.0110294 0.8523392 1.0370370 1.0000000 0.8714286 NA
## [113] 1.0367893 0.8851571 0.9935760 1.0387205 NA NA 1.0037360
## [120] 1.3128120 1.3611111 0.9814126 0.9948187 1.4523810 1.6376812 1.2145749
## [127] 1.4168112 1.3661270 1.3690764 1.3958381 1.6978682 1.8080439 1.6153846
## [134] 1.6371865 1.4590164 1.7750000 1.5096416 1.6000000 1.4000000 2.6875000
## [141] 0.9375000 1.0333333 1.0238095 1.2435897 0.9967949 1.9930727 1.0223097
## [148] 1.0122951 NA 1.0784314 1.0846645 1.2287234 1.0720721 1.0817439
## [155] 1.3554217 1.2279106 1.7293233 1.8583333 1.7475728 2.0135135 0.9241848
## [162] 0.9371429 1.0182704 1.0622711 NA NA 1.3171122 1.0869565
## [169] 1.2278481 2.0531375 2.1689338 0.9955291 0.9981269 1.1190476 0.9848283
## [176] 0.9440454 0.9742351 0.9277014 0.9442897 0.9832215 1.3033175 NA
## [183] NA 1.5431034 0.9953488 1.0201715 0.9343189 0.9978947 0.9461967
## [190] 0.9052453 1.1697026 1.0270270 0.9359694 NA 1.3600000 1.3191176
## [197] 1.1515152 1.6000000 1.1092437 1.0406504 NA 1.1452991 1.6239316
## [204] NA 1.1103025 1.1368421 1.0714286 1.0860484 1.0815650 1.0799824
## [211] 1.4444444 1.0778443 1.0312500
creating variable named “sex_ratio” [ratio of adult females to adult males]
d$sex_ratio <- (d$AdultFemale/d$AdultMales)
d$sex_ratio
## [1] NA 1.0000000 1.0000000 1.4347826 1.6058394 2.1724138
## [7] 1.1298701 1.2941176 1.0000000 1.0000000 NA 1.0000000
## [13] 1.0000000 1.0000000 1.0000000 NA 2.0588235 NA
## [19] NA 0.5750000 1.0000000 1.0000000 1.0142857 NA
## [25] 1.0000000 NA NA NA 1.0000000 1.0000000
## [31] 1.0000000 2.0000000 NA 1.6666667 0.5000000 1.1785714
## [37] 1.2608696 0.5000000 0.6666667 0.6666667 1.7105263 1.3611111
## [43] 1.4705882 1.9714286 NA 3.7500000 4.7500000 NA
## [49] 2.7777778 NA 8.5000000 4.0000000 3.0000000 7.0000000
## [55] NA NA NA 8.8000000 1.9000000 1.0000000
## [61] 5.0000000 4.8879310 5.0000000 NA NA NA
## [67] 1.0000000 1.0000000 6.0000000 1.8780488 NA NA
## [73] 2.0000000 1.9333333 1.0000000 2.6086957 NA 1.0000000
## [79] 6.9333333 0.8571429 NA 0.8333333 0.9079903 1.1666667
## [85] 0.9117647 1.0000000 1.0000000 0.6341463 0.9466667 NA
## [91] NA 1.0000000 1.0000000 NA NA NA
## [97] 2.4444444 NA 1.0000000 NA 1.0000000 1.0000000
## [103] 1.1111111 1.0000000 NA NA 1.0000000 1.0000000
## [109] 1.1232877 1.2307692 0.6250000 NA 0.8333333 1.0000000
## [115] NA 1.0000000 NA 1.0000000 1.0000000 1.8139535
## [121] NA NA 1.0000000 NA NA NA
## [127] 2.2553191 5.1470588 NA 5.2888889 6.2083333 3.4827586
## [133] NA 1.2125000 2.2903226 2.2962963 1.4893617 2.4444444
## [139] NA NA 1.0000000 1.0000000 1.0000000 2.0000000
## [145] 1.0000000 3.7000000 NA NA NA NA
## [151] 1.0000000 1.0000000 NA NA 1.5571429 2.8965517
## [157] 2.4305556 2.7714286 8.2000000 2.0746269 1.0000000 1.0000000
## [163] 1.9148936 5.1730769 NA 5.9606299 2.8571429 NA
## [169] 2.0000000 NA 1.0000000 1.1666667 6.0000000 1.6500000
## [175] NA NA 1.2500000 0.9863014 NA 1.6470588
## [181] 2.8461538 NA NA NA NA 0.6842105
## [187] 0.8750000 1.0000000 NA NA 0.9090909 NA
## [193] 0.6296296 0.6000000 NA 0.8900000 1.8372093 6.3888889
## [199] 1.0000000 NA NA NA 2.7333333 6.1666667
## [205] 15.6000000 3.1764706 2.3846154 2.8000000 NA 3.3809524
## [211] 3.8000000 NA 0.5680000
creating variable named “DI” [ratio of day range length to diameter of home range]
d$DI <- (d$DayLength_km/d$HomeRange_km2)
d$DI
## [1] NA NA NA NA NA
## [6] 1.68421053 NA 5.50000000 NA NA
## [11] NA NA 6.74285714 33.00000000 NA
## [16] NA 1.00877193 NA 2.32876712 1.76470588
## [21] 46.00000000 NA 0.86538462 3.05084746 0.71428571
## [26] NA NA NA 7.85714286 14.68085106
## [31] 2.75862069 6.25000000 NA NA NA
## [36] 11.20000000 44.00000000 NA 100.00000000 145.00000000
## [41] 3.39622642 2.38636364 5.71428571 1.35294118 NA
## [46] NA 2.35294118 NA NA NA
## [51] 9.37500000 NA NA 1.28000000 NA
## [56] NA NA 10.00000000 NA 7.00000000
## [61] 1.30434783 NA 1.52173913 NA NA
## [66] NA NA NA 1.11111111 NA
## [71] NA NA 0.26495957 1.60000000 1.14000000
## [76] 0.22316176 NA 8.28651685 0.11331445 7.55769231
## [81] NA 3.57142857 7.41463415 2.75925926 NA
## [86] 16.95238095 2.22000000 1.45119863 10.82857143 NA
## [91] NA NA NA NA 176.00000000
## [96] NA 0.21339950 1.90476190 43.00000000 NA
## [101] NA 4.48275862 4.68750000 5.00000000 11.66666667
## [106] 3.46153846 5.65217391 2.08333333 0.39007092 18.00000000
## [111] NA NA 5.53846154 NA NA
## [116] NA NA NA NA 0.60606061
## [121] NA NA NA NA NA
## [126] NA 3.26086957 0.65034965 NA 0.27072758
## [131] 1.02857143 NA NA 1.98863636 1.85227273
## [136] NA 0.63269962 NA NA 0.11389522
## [141] NA NA NA 3.00000000 NA
## [146] 0.17426273 1.49425287 NA NA NA
## [151] NA NA NA NA 0.08592911
## [156] 0.35023041 0.95238095 0.39464883 0.63327576 0.61403509
## [161] NA NA 1.30952381 3.20245399 NA
## [166] 4.70796460 0.99846154 NA 11.71875000 NA
## [171] 0.12886598 1.31578947 7.33333333 2.16428571 NA
## [176] NA NA NA NA 28.33333333
## [181] NA NA 0.05240000 NA NA
## [186] 8.00000000 NA 9.36363636 NA NA
## [191] 8.80952381 NA 31.66666667 NA NA
## [196] 34.00000000 2.64705882 0.38461538 2.39616613 45.00000000
## [201] NA 28.75000000 2.15517241 NA 2.91666667
## [206] NA NA 1.45454545 1.04384134 0.75581395
## [211] NA NA 0.73453237
Plotting the relationship between day range length and time spent moving
plot(d$DayLength_km~d$Move)
It appears that there’s not much of a relationship between time spent moving and day range length, however, most species appear to spend less than 40 mins per day moving
Plotting the relationship between day range length and log(time spent moving)
plot(d$DayLength_km~log(d$Move))
There still appears to not be much of a relationship
Plotting the relationship between log(day range length) and time spent moving
plot(log(d$DayLength_km)~d$Move)
Data seems skewed to the left
Plotting the relationship between log(day range length) and log(time spent moving)
plot(log(d$DayLength_km)~log(d$Move))
Log-transforming both variables shows that overall, species that do spend more time moving travel farther than those who don’t
Plotting the relationship between log(day range length) and log(time spent moving), grouped by family
p <- ggplot(data = d, aes(x = log(Move),
y = log(DayLength_km),
color = factor(d$Family)
))
p <- p+ xlab("log(Move)") + ylab("log(DayLength_km)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p
adding marginal univariate plots
library(ggExtra)
ggMarginal(p, type = "densigram")
detach(package:ggExtra)
setting up a grid for faceting by a grouping variable
p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p
Plotting the relationship between day range length and time group size
plot(d$DayLength_km~d$MeanGroupSize)
Plotting the relationship between log(day range length) and time group size
plot(log(d$DayLength_km)~d$MeanGroupSize)
Plotting the relationship between day range length and log(time group size)
plot(log(d$DayLength_km)~d$MeanGroupSize)
Plotting the relationship between log(day range length) and log(time group size)
plot(log(d$DayLength_km)~log(d$MeanGroupSize))
Plotting the relationship between day range length and time group size, grouped by family
p <- ggplot(data = d, aes(x = MeanGroupSize,
y = DayLength_km,
color = factor(d$Family)
))
p <- p+ xlab("log(MeanGroupSize)") + ylab("log(DayLength_km)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p
adding marginal univariate plots
library(ggExtra)
ggMarginal(p, type = "densigram")
detach(package:ggExtra)
setting up a grid for faceting by a grouping variable
p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p
no apparent relationship between day range length and mean group size
Plotting the relationship between log(day range length) and log(time group size), grouped by family
p <- ggplot(data = d, aes(x = log(MeanGroupSize),
y = log(DayLength_km),
color = factor(d$Family)
))
p <- p+ xlab("log(MeanGroupSize)") + ylab("log(DayLength_km)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p
adding marginal univariate plots
library(ggExtra)
ggMarginal(p, type = "densigram")
detach(package:ggExtra)
setting up a grid for faceting by a grouping variable
p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p
Cebidae appears to have positive relationship between day range length and mean group size, when both variables log-transformed
Plotting the relationship between body size dimorphism and canine size dimorphism, both overall and by family
Plotting the relationship between body size dimorphism and canine size dimorphism, overall
plot(d$Canine_Dimorphism~d$bsd)
Plotting the relationship between log(body size dimorphism) and log(canine size dimorphism), overall
plot(log(d$Canine_Dimorphism)~log(d$bsd))
Points more spread out, positive, nonlinear relationship between bsd and Canine dimorphism
Plotting the relationship between body size dimorphism and canine size dimorphism, grouped by family
p <- ggplot(data = d, aes(x = bsd,
y = Canine_Dimorphism,
color = factor(d$Family)
))
p <- p+ xlab("bsd") + ylab("Canine_Dimorphism")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p
adding marginal univariate plots
library(ggExtra)
ggMarginal(p, type = "densigram")
detach(package:ggExtra)
setting up a grid for faceting by a grouping variable
p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p
Cercopithecidae exhibits somewhat of a linear relationship between bsd and Canine dimorphism, Cebidae does as well yet with fewer data points (limited certainty)
Plotting the relationship between log(body size dimorphism) and log(canine size dimorphism), grouped by family
p <- ggplot(data = d, aes(x = log(bsd),
y = log(Canine_Dimorphism),
color = factor(d$Family)
))
p <- p+ xlab("log(bsd)") + ylab("log(Canine_Dimorphism)")
p <- p + geom_point(na.rm = TRUE)
p <- p + theme(legend.position = "bottom", legend.title = element_blank())
p
adding marginal univariate plots
library(ggExtra)
ggMarginal(p, type = "densigram")
detach(package:ggExtra)
setting up a grid for faceting by a grouping variable
p <- p + facet_wrap(~Family, ncol = 4)
p <- p + theme(legend.position = "none")
p
Cercopithecidae also exhibits somewhat of a linear relationship between log (bsd) and log (Canine dimorphism), and Cebidae does as well yet with fewer data points (limited certainty)
Diet strategy analysis
d <- mutate(
d,
"diet" = case_when(
Fruit >= 50 ~ "frugivore",
Leaves >= 50 ~ "folivore",
Fruit < 50 & Leaves < 50 ~ "omnivore"
)
)
Boxplots of group size vs dietary strategies
boxplot(d$MeanGroupSize~d$diet)
Frugivores, on average, live in smaller groups than folivores, however, there’s much larger variability in frugivore data compared to folivores
One line code using forward pipe operator
s <- mutate(d, Binomial = paste(Genus, Species, sep = " ")) %>%
select(Binomial,
Family,
Brain_Size_Species_Mean,
Body_mass_male_mean
) %>%
group_by(Family) %>%
summarise(
avgbrainsize = mean(Brain_Size_Species_Mean, na.rm = TRUE),
avgbodymass = mean(Body_mass_male_mean, na.rm = TRUE),
) %>%
arrange(desc(avgbrainsize))
s
## # A tibble: 14 × 3
## Family avgbrainsize avgbodymass
## <chr> <dbl> <dbl>
## 1 Hominidae 410. 98681.
## 2 Hylobatidae 101. 6926.
## 3 Cercopithecidae 85.4 9543.
## 4 Atelidae 80.6 7895.
## 5 Pitheciidae 56.3 1955.
## 6 Daubentonidae 44.8 2620
## 7 Indriidae 27.3 3638.
## 8 Cebidae 23.9 1012.
## 9 Lemuridae 23.1 2077.
## 10 Lorisidae 8.67 512.
## 11 Lepilemuridae 7.27 792
## 12 Galagidae 5.96 395.
## 13 Cheirogalidae 4.04 193.
## 14 Tarsiidae 3.26 131